DM 'CLEAR LOG; CLEAR OUTPUT;';

libname duration 'path to be completed path to duration data sets';
title '                   ';


/******************************WITHOUT Min WAGE**************/
/*************Stack all three data sets containing only wage agreements at firm- or industry levels*/

/*duree_accords_br : contains observed durations between two industry-level agreements*/
/*main variables:
D23: sector
SIREN: firm identifier
YP: number of employees
ann_acc: year of agreement
code_dept: local departement
mois_acc: month of agreement
t_ents_yp: size class*/

/*duree_acc_ents : contains observed durations between two firm-level agreements*/
/*main variables:
D23: sector
SIREN: firm identifier
YP: number of employees
ann_acc: year of agreement
code_dept: local departement
mois_acc: month of agreement
t_ents_yp: size class*/

/*duree_acc_debfin : contains dates of beginning and end of sample firms*/
/*main variables:
D23: sector
SIREN: firm identifier
YP: number of employees
code_dept: local departement
t_ents_yp: size class
indic_deb date of first observation of the firm in the sample
indic_fin date of last observation of the firm in the sample
*/ 

data duration.duree_accords_br_ents;
set duration.duree_accords_br duration.duree_acc_ents duration.duree_acc_debfin; 
drop an l_an l_mois;
run;
proc sort data=duration.duree_accords_br_ents;
by siren ann_acc mois_acc ;
run;
/*define censoring*/
data duration.duree_accords_br_ents;
set duration.duree_accords_br_ents;
indic_cd=0;
indic_cg=0;
if indic_deb=1 then indic_cg=1;
if indic_fin=1 then indic_cd=1;
run;
/*keep only useful variables : sector (D23), firm identifier (siren), year of agreement (an_acc), geographical location (code_dept), left and right censoring dummy, 
month of the agreement (month_acc), size of the firm in classes (t_ents_yp), nb of employees (yp)*/
data duration.duree_accords_br_ents;
set duration.duree_accords_br_ents;
keep D23 SIREN ann_acc code_dept indic_cd indic_cg  mois_acc t_ents_yp yp; 
run;

/*delete duplicated observations*/
proc sort data=duration.duree_accords_br_ents nodupkeys;
by siren ann_acc mois_acc ;
run;

/*compute durations (in months) between two successive agreements industry or firm-level agreeements*/
data duration.duree_accords_br_ents;
set duration.duree_accords_br_ents;
l_an= lag(ann_acc);
l_mois= lag(mois_acc);
l_siren= lag(siren);
l_cg= lag(indic_cg);
if l_siren=siren then duree=(ann_acc-l_an)*12+mois_acc-l_mois;
if l_siren=siren and l_cg=1 then indic_cg=2;
l2_an= lag(l_an);
if ann_acc=1993 then indic_cg=1;
if l_an=1993 then indic_cg=2;
if l2_an=1993 then indic_cg=0;
run;




/******************************WITH Min WAGE**************/

/*************Stack all 4 data sets containing only wage agreements at firm- or industry levels + NMW */

/*duree_accords_br : contains observed durations between two industry-level agreements*/
/*duree_acc_ents : contains observed durations between two firm-level agreements*/
/*duree_acc_debfin : contains dates of beginning and end of sample firms*/
/*duree_accord_smic : dates of NMW increases*/

data duration.duree_accords_br_ents_smic1;
set duration.duree_accords_br duration.duree_acc_ents duration.duree_acc_debfin duration.duree_accord_smic; 
drop an l_an l_mois;
run;
proc sort data=duration.duree_accords_br_ents_smic1;
by siren ann_acc mois_acc ;
run;
data duration.duree_accords_br_ents_smic1;
set duration.duree_accords_br_ents_smic1;
indic_cd=0;
indic_cg=0;
if indic_deb=1 then indic_cg=1;
if indic_fin=1 then indic_cd=1;
run;
data duration.duree_accords_br_ents_smic1;
set duration.duree_accords_br_ents_smic1;
drop l_siren ;
run;

/****************************************************************/
/* We duplicate the whole data set creating two other data sets:
/* - one accounting for minimum wage earners: the data set will contain the proportion of NMW earners in a given industry 
for a given date and will be associated with all firms. We then will calculate the nb of employees in a given firm covered by NMW
	- one accounting for other workers: the data set will contain the proportion of other workers (100-%of NMW earners) in a given industry 
for a given date and will be associated with all firms. We then will calculate the nb of employees in a given firm not covered by NMW

Then we will stack the data sets and compute the weighted (by nb of employees) durations of wage agreements including NMW*/

/*duration.duree_accord_smic1: contains for each firm*year the % of minimum wage earners in its industry * d�partement (geographical location)***/
data duration.duree_accord_smic1;
set duration.duree_accord_smic1;
rename an=ann_acc;
run;
proc sort data=duration.duree_accord_smic1;
by siren ann_acc;
run;
proc sort data=duration.duree_accords_br_ents_smic1;
by siren ann_acc;
run;
data duration.duree_accords_br_ents_smic1b;
merge duration.duree_accords_br_ents_smic1 duration.duree_accord_smic1;
by siren ann_acc;
run;
data duration.duree_accords_br_ents_smic1b;
set duration.duree_accords_br_ents_smic1b;
l_an=lag(ann_acc);
l_siren=lag(siren);
l_psmic2=lag(p_smic2);
if l_an=ann_acc and l_siren=siren and p_smic2=. then p_smic2=l_psmic2;
run;
data duration.duree_accords_br_ents_smic1b;
set duration.duree_accords_br_ents_smic1b;
drop l_an l_siren l_psmic2 nes114;
run;

proc sort data=duration.duree_accords_br_ents_smic1b;
by siren descending ann_acc descending mois_acc;
run;
data duration.duree_accords_br_ents_smic1b;
set duration.duree_accords_br_ents_smic1b;
l_an=lag(ann_acc);
l_siren=lag(siren);
l_psmic2=lag(p_smic2);
if l_an=ann_acc and l_siren=siren and p_smic2=. then p_smic2=l_psmic2;
run;
data duration.duree_accords_br_ents_smic1b;
set duration.duree_accords_br_ents_smic1b;
drop l_an l_siren l_psmic2;
run;
proc sort data=duration.duree_accords_br_ents_smic1b;
by siren ann_acc mois_acc;
run;

data duration.duree_accords_br_ents_smic1b;
set duration.duree_accords_br_ents_smic1b;
drop l_an l_siren l_psmic2 l_nes nes114 indic_deb indic_fin;
run;
proc sort data=duration.duree_accords_br_ents_smic1b nodupkeys;
by siren ann_acc mois_acc ;
run;

/*compute durations (in months) between two successive agreements industry or firm-level agreeements*/
/* for workers covered by NMW   *****************************************************************/
data duration.duree_accords_br_ents_smic1b;
set duration.duree_accords_br_ents_smic1b;
l_an= lag(ann_acc);
l_mois= lag(mois_acc);
l_siren= lag(siren);
l_cg= lag(indic_cg);
if l_siren=siren then duree=(ann_acc-l_an)*12+mois_acc-l_mois;
if l_siren=siren and l_cg=1 then indic_cg=2;
l2_an= lag(l_an);
if ann_acc=1993 then indic_cg=1;
if l_an=1993 then indic_cg=2;
if l2_an=1993 then indic_cg=0;
run;




/*duration.duree_accord_smic0: contains for each firm*year the % of workers not covered by NMW in its industry * d�partement (geographical location)***/
data duration.duree_accords_br_ents_smic0;
set duration.duree_accords_br duration.duree_acc_ents duration.duree_acc_debfin; 
drop an l_an l_mois;
run;
proc sort data=duration.duree_accords_br_ents_smic0;
by siren ann_acc mois_acc ;
run;
data duration.duree_accords_br_ents_smic0;
set duration.duree_accords_br_ents_smic0;
indic_cd=0;
indic_cg=0;
if indic_deb=1 then indic_cg=1;
if indic_fin=1 then indic_cd=1;
run;
data duration.duree_accords_br_ents_smic0;
set duration.duree_accords_br_ents_smic0;
drop l_siren indic_deb indic_fin;
run;

data duration.duree_accord_smic0;
set duration.duree_accord_smic0;
rename an=ann_acc;
run;
proc sort data=duration.duree_accord_smic0;
by siren ann_acc;
run;
proc sort data=duration.duree_accords_br_ents_smic0;
by siren ann_acc;
run;
data duration.duree_accords_br_ents_smic0b;
merge duration.duree_accords_br_ents_smic0 duration.duree_accord_smic0;
by siren ann_acc;
run;
data duration.duree_accords_br_ents_smic0b;
set duration.duree_accords_br_ents_smic0b;
l_an=lag(ann_acc);
l_siren=lag(siren);
l_psmic2=lag(p_smic2);
if l_an=ann_acc and l_siren=siren and p_smic2=. then p_smic2=l_psmic2;
run;
data duration.duree_accords_br_ents_smic0b;
set duration.duree_accords_br_ents_smic0b;
drop l_an l_siren l_psmic2 nes114;
run;

proc sort data=duration.duree_accords_br_ents_smic0b;
by siren descending ann_acc descending mois_acc;
run;
data duration.duree_accords_br_ents_smic0b;
set duration.duree_accords_br_ents_smic0b;
l_an=lag(ann_acc);
l_siren=lag(siren);
l_psmic2=lag(p_smic2);
if l_an=ann_acc and l_siren=siren and p_smic2=. then p_smic2=l_psmic2;
run;
data duration.duree_accords_br_ents_smic0b;
set duration.duree_accords_br_ents_smic0b;
drop l_an l_siren l_psmic2;
run;
proc sort data=duration.duree_accords_br_ents_smic0b;
by siren ann_acc mois_acc;
run;

data duration.duree_accords_br_ents_smic0b;
set duration.duree_accords_br_ents_smic0b;
drop l_an l_siren l_psmic2 l_nes nes114 indic_deb indic_fin;
run;
proc sort data=duration.duree_accords_br_ents_smic0b nodupkeys;
by siren ann_acc mois_acc ;
run;
data duration.duree_accords_br_ents_smic0b ;
set duration.duree_accords_br_ents_smic0b ;
if mois_acc=. then delete ;
run;



/*compute durations (in months) between two successive agreements industry or firm-level agreeements*/
/* for workers NOT covered by NMW   *****************************************************************/
data duration.duree_accords_br_ents_smic0b;
set duration.duree_accords_br_ents_smic0b;
l_an= lag(ann_acc);
l_mois= lag(mois_acc);
l_siren= lag(siren);
l_cg= lag(indic_cg);
if l_siren=siren then duree=(ann_acc-l_an)*12+mois_acc-l_mois;
if l_siren=siren and l_cg=1 then indic_cg=2;
l2_an= lag(l_an);
if ann_acc=1993 then indic_cg=1;
if l_an=1993 then indic_cg=2;
if l2_an=1993 then indic_cg=0;
run;



/*STACK the two data sets (not covered and covered)*/
data duration.duree_accords_br_ents_smic10b;
set duration.duree_accords_br_ents_smic0b duration.duree_accords_br_ents_smic1b;
yp_smic=yp*p_smic2/100;
run;
data duration.duree_accords_br_ents_smic10b;
set duration.duree_accords_br_ents_smic10b;
drop l2_an l_an l_cg l_mois l_siren ; 
run;


/******************************COMPUTE Descriptive statistics on durations*********************/
/*duration between two wage agreements (weighted by number of employees*/
/*TABLE 1 line 1*/
proc univariate data=duration.duree_accords_br_ents_smic10b (where=(indic_cg=0 /*and indic_cd=0*/));
var duree;
weight yp_smic;
run;
proc freq data=duration.duree_accords_br_ents_smic10b (where=(indic_cg=0 and indic_cd=0 ));
tables duree ;
weight yp_smic;
run;
quit;
/* Density function and Hazard rate Figure 1*/
proc lifetest data=duration.duree_accords_br_ents_smic10b (where=(indic_cg=0 )) plots=(h s p) outsurv=duration.res_dur_acc_smic_9405  
intervals=(1 to 40 by 1) method =lt maxtime=50 noprint;
time duree*indic_cd(1);
freq yp_smic;
run;
quit;


